{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import time\n",
    "import sys\n",
    "if \"../\" not in sys.path:\n",
    "  sys.path.append(\"../\") \n",
    "from lib.utils import read_data_from_file, sign, sigmoid\n",
    "from lib.logistic_reg import logistic_reg, logistic_reg_sgd\n",
    "from sklearn.preprocessing import PolynomialFeatures"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def ridge_reg(X, y, lambd=0.01):\n",
    "    \"\"\"\n",
    "    Linear Regression Algorithm(Closed Form)\n",
    "    Args:\n",
    "        X: 数据\n",
    "        y: 预测值\n",
    "        lambd: 罚项系数\n",
    "    Returns:\n",
    "        w_reg: 特征权重\n",
    "    \"\"\"    \n",
    "    w_reg = np.linalg.inv((X.T).dot(X)+lambd*np.eye(len(X[0]))).dot(X.T).dot(y)\n",
    "    return w_reg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_err_rate(X, y, w):\n",
    "    err_rate = (sign(X.dot(w)) != y).mean()\n",
    "    return err_rate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "data_train shape:  (200, 3)\n",
      "data_test shape:  (1000, 3)\n"
     ]
    }
   ],
   "source": [
    "# 数据读取\n",
    "data_train = read_data_from_file('hw4_train.dat') \n",
    "print('data_train shape: ', data_train.shape)\n",
    "data_test = read_data_from_file('hw4_test.dat')\n",
    "print('data_test shape: ', data_test.shape)\n",
    "\n",
    "y = data_train[:,-1]\n",
    "X = np.concatenate((np.ones((data_train.shape[0],1)), data_train[:,:-1]), axis=1)\n",
    "y_test = data_test[:,-1]\n",
    "X_test = np.concatenate((np.ones((data_test.shape[0],1)), data_test[:,:-1]), axis=1)\n",
    "\n",
    "times = 2000"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "13. Consider regularized linear regression (also called ridge regression) for classification$$w_{reg}=argmin_{w}(\\frac{\\lambda}{N}||w||^2+\\frac{1}{N}||Xw−y||^2).$$\n",
    "Run the algorithm on the following data set as $D$:<br/><br/>https://www.csie.ntu.edu.tw/~htlin/mooc/datasets/mlfound_algo/hw4_train.dat<br/><br/>and the following set for evaluating $E_{out}$<br/><br/>https://www.csie.ntu.edu.tw/~htlin/mooc/datasets/mlfound_algo/hw4_test.dat<br/><br/>Because the data sets are for classification, please consider only the 0/1 error for all Questions below.<br/>Let $\\lambda = 10$, which of the followings is the corresponding $E_{in}$ and $E_{out}$?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ein:  0.05 Eout:  0.045\n"
     ]
    }
   ],
   "source": [
    "w_reg = ridge_reg(X, y, lambd=10)\n",
    "print('Ein: ', get_err_rate(X,y,w_reg), 'Eout: ', get_err_rate(X_test, y_test, w_reg))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "14. Following the previous Question, aong $\\log_{10} \\lambda= \\left\\{2, 1, 0, -1, \\ldots, -8, -9, -10 \\right\\}$. What is the $\\lambda$ with the minimum $E_{in}$? Compute $\\lambda$ and its corresponding $E_{in}$ and $E_{out}$ then select the closest answer. Break the tie by selecting the largest $\\lambda$."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1.e+02 1.e+01 1.e+00 1.e-01 1.e-02 1.e-03 1.e-04 1.e-05 1.e-06 1.e-07\n",
      " 1.e-08 1.e-09 1.e-10]\n"
     ]
    }
   ],
   "source": [
    "lambds = np.power(10., np.arange(2, -11, -1))\n",
    "print(lambds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lambda:  1e-08 Ein:  0.015 Eout:  0.02\n"
     ]
    }
   ],
   "source": [
    "e_in, e_out = np.inf, np.inf\n",
    "cor_lambda = 0\n",
    "for lambd in lambds:\n",
    "    w_reg = ridge_reg(X, y, lambd=lambd)\n",
    "    e_in_tmp = get_err_rate(X,y,w_reg)\n",
    "    if e_in > e_in_tmp:\n",
    "        e_in = e_in_tmp   \n",
    "        e_out = get_err_rate(X_test, y_test, w_reg)\n",
    "        cor_lambda = lambd\n",
    "print('lambda: ', cor_lambda, 'Ein: ', e_in, 'Eout: ', e_out)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "15. Following the previous Question, among $\\log_{10} \\lambda= \\left\\{2, 1, 0, -1, \\ldots, -8, -9, -10 \\right\\}$. What is the $\\lambda$ with the minimum $E_{out}$? Compute $\\lambda$ and the corresponding $E_{in}$ and $E_{out}$ then select the closest answer. Break the tie by selecting the largest $\\lambda$."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lambda:  1e-07 Ein:  0.03 Eout:  0.015\n"
     ]
    }
   ],
   "source": [
    "e_in, e_out = np.inf, np.inf\n",
    "cor_lambda = 0\n",
    "for lambd in lambds:\n",
    "    w_reg = ridge_reg(X, y, lambd=lambd)\n",
    "    e_out_tmp = get_err_rate(X_test, y_test, w_reg)\n",
    "    if e_out > e_out_tmp:\n",
    "        e_out = e_out_tmp   \n",
    "        e_in = get_err_rate(X, y, w_reg)\n",
    "        cor_lambda = lambd\n",
    "print('lambda: ', cor_lambda, 'Ein: ', e_in, 'Eout: ', e_out)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "16. Now split the given training examples in $D$ to the first $120$ examples for $D_{train}$ and $80$ for $D_{val}$. {Ideally, you should randomly do the $120/80$ split. Because the given examples are already randomly permuted, however, we would use a fixed split for the purpose of this problem.}<br/><br/>Run the algorithm on $D_{train}$ to get $g^{-}_{\\lambda}$, and validate $g^{-}_{\\lambda}$ with $D_{val}$. Among $\\log_{10} \\lambda= \\left\\{2, 1, 0, -1, \\ldots, -8, -9, -10 \\right\\}$. What is the $\\lambda$ with the minimum $E_{train}$($g^{-}_{\\lambda}$)? Compute $\\lambda$ and the corresponding $E_{train}$($g^{-}_{\\lambda}$), $E_{val}$($g^{-}_{\\lambda}$) and $E_{out}$($g^{-}_{\\lambda}$) then select the closet answer. Break the tie by selecting the largest $\\lambda$."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = []\n",
    "for lambd in lambds:\n",
    "    w_reg = ridge_reg(X[:120], y[:120], lambd=lambd)\n",
    "    result.append((lambd, get_err_rate(X[:120], y[:120], w_reg), get_err_rate(X[-80:], y[-80:], w_reg), get_err_rate(X_test, y_test, w_reg)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lambda: 1e-08, E_train: 0.0, E_val: 0.05, E_out: 0.025\n"
     ]
    }
   ],
   "source": [
    "list.sort(result, key=lambda x: x[1])\n",
    "print('lambda: {}, E_train: {}, E_val: {}, E_out: {}'.format(*result[0]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "17. Following the previous Question, among $\\log_{10} \\lambda= \\left\\{2, 1, 0, -1, \\ldots, -8, -9, -10 \\right\\}$. What is the $\\lambda$ with the minimum $E_{val}$($g^{-}_\\lambda$})? Compute $\\lambda$ and the corresponding $E_{train}$($g^{-}_{\\lambda}$), $E_{val}$($g^{-}_{\\lambda}$) and $E_{out}$($g^{-}_{\\lambda}$) then select the closet answer. Break the tie by selecting the largest $\\lambda$."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lambda: 1.0, E_train: 0.03333333333333333, E_val: 0.0375, E_out: 0.028\n"
     ]
    }
   ],
   "source": [
    "list.sort(result, key=lambda x: x[2])\n",
    "print('lambda: {}, E_train: {}, E_val: {}, E_out: {}'.format(*result[0]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "18. Run the algorithm with the optimal $\\lambda$ of the previous Question on the whole $D$ to get $g_{\\lambda}$. Compute $E_{in}$($g_{\\lambda}$) and $E_{out}$($g_{\\lambda}$) then select the closet answer."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "E_in:  0.035 E_out:  0.02\n"
     ]
    }
   ],
   "source": [
    "w_reg = ridge_reg(X, y, lambd=result[0][0])\n",
    "print('E_in: ', get_err_rate(X, y, w_reg), 'E_out: ', get_err_rate(X_test, y_test, w_reg))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "19. For Questions 19-20, split the given training examples in $D$ to five folds, the first $40$ being fold $1$, the next $40$ being fold $2$, and so on. Again, we take a fixed split because the given examples are already randomly permuted.<br/><br/>Among $\\log_{10} \\lambda= \\left\\{2, 1, 0, -1, \\ldots, -8, -9, -10 \\right\\}$. What is the $\\lambda$ with the minimum $E_{cv}$ comes from the five folds defined above? Compute $\\lambda$ and the corresponding $E_{cv}$ then select the closet answer. Break the tie by selecting the largest $\\lambda$."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = []\n",
    "for lambd in lambds:\n",
    "    e_cv = 0\n",
    "    for i in range(0, 5):\n",
    "        cv_index = range(i*40, i*40 + 40)\n",
    "        train_index = list(set(range(len(X))) - set(cv_index))\n",
    "        \n",
    "        w_reg = ridge_reg(X[train_index], y[train_index], lambd=lambd)\n",
    "        e_cv += get_err_rate(X[cv_index], y[cv_index], w_reg)\n",
    "    result.append((lambd, e_cv/5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lambd: 1e-08, E_cv: 0.03\n"
     ]
    }
   ],
   "source": [
    "list.sort(result, key=lambda x: x[1])\n",
    "print('lambd: {}, E_cv: {}'.format(*result[0]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "20. Run the algorithm with the optimal $\\lambda$ of the previous problem on the whole $D$ to get $g_{\\lambda}$. Compute $E_{in}$($g_{\\lambda}$) and $E_{out}$($g_{\\lambda}$) then select the closet answer."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "E_in:  0.015 E_out:  0.02\n"
     ]
    }
   ],
   "source": [
    "w_reg = ridge_reg(X, y, lambd=result[0][0])\n",
    "print('E_in: ', get_err_rate(X, y, w_reg), 'E_out: ', get_err_rate(X_test, y_test, w_reg))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
